library(tidyverse)
library(ragg)
library(sf)

# this code was written before the release of sf
sf_use_s2(FALSE)

######
# ML Predictions
######
pred_ASM_robust_files <- list.files("Data/ML Prediction/Predictions",
                                    "prediction_model_ASM_\\d+_individual_hp\\.dill\\.csv")

pred_ASM_robust_seeds <- pred_ASM_robust_files %>% 
  str_split("_|\\.") %>% 
  sapply(`[`, 5) %>% 
  as.numeric()

voted_asm <- 
  lapply(paste0("Data/ML Prediction/Predictions/", pred_ASM_robust_files), read_csv) %>% 
  bind_rows()

#####
# Grids
#####

# this section is needed to subset to the GIDs within the predictive region 
asm_sites_cod <- st_read("Data/IPIS/cod_mines_curated_all_opendata_p_ipis.shp")
asm_sites_tza <- st_read("Data/IPIS/tza_mines_curated_all_opendata_p_ipis.shp")

asm_sites_burkina <- st_read("Data/Burkina ASM/Sites_Potentiels.shp")

grids <- st_read("Data/ML Prediction/Predictors/gridded_covariates_snl.gpkg")

grids_asm <- 
  grids %>% 
  {c(grids$gid[sapply(st_intersects(., asm_sites_cod %>% filter(is_gold_mi == 1)), function(x){length(x)> 0})], 
     grids$gid[sapply(st_intersects(., asm_sites_tza %>% filter(grepl("Gold",mineral1na))), function(x){length(x)> 0})], 
     grids$gid[sapply(st_intersects(., asm_sites_burkina), function(x){length(x)> 0})])}

#####
# Figure
#####

agg_png("./Figures/Figure-A-6-Predicted-vs-Actual.png",
        width = 7, height = 4, units = "in", res = 300)
print(
voted_asm  %>% 
  group_by(gid) %>% 
    summarise(ASM_prob = mean(ASM_prob)) %>% 
  ungroup() %>% 
  mutate(ASM_actual = gid %in% grids_asm) %>% 
  ggplot + 
    geom_boxplot(aes(ASM_actual, ASM_prob), alpha = .05) +
    labs(title = "ASM Probabilities of Predicted vs Actual ASM", y = "Predicted Probability of ASM", x = "Actual ASM") +
    coord_flip()
)
dev.off()
